!pip install yfinance
Requirement already satisfied: yfinance in d:\christ\python\lib\site-packages (0.2.41) Requirement already satisfied: pandas>=1.3.0 in d:\christ\python\lib\site-packages (from yfinance) (2.0.3) Requirement already satisfied: numpy>=1.16.5 in d:\christ\python\lib\site-packages (from yfinance) (1.24.3) Requirement already satisfied: requests>=2.31 in d:\christ\python\lib\site-packages (from yfinance) (2.31.0) Requirement already satisfied: multitasking>=0.0.7 in d:\christ\python\lib\site-packages (from yfinance) (0.0.11) Requirement already satisfied: lxml>=4.9.1 in d:\christ\python\lib\site-packages (from yfinance) (4.9.3) Requirement already satisfied: platformdirs>=2.0.0 in d:\christ\python\lib\site-packages (from yfinance) (3.10.0) Requirement already satisfied: pytz>=2022.5 in d:\christ\python\lib\site-packages (from yfinance) (2023.3.post1) Requirement already satisfied: frozendict>=2.3.4 in d:\christ\python\lib\site-packages (from yfinance) (2.4.4) Requirement already satisfied: peewee>=3.16.2 in d:\christ\python\lib\site-packages (from yfinance) (3.17.6) Requirement already satisfied: beautifulsoup4>=4.11.1 in d:\christ\python\lib\site-packages (from yfinance) (4.12.2) Requirement already satisfied: html5lib>=1.1 in d:\christ\python\lib\site-packages (from yfinance) (1.1) Requirement already satisfied: soupsieve>1.2 in d:\christ\python\lib\site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.4) Requirement already satisfied: six>=1.9 in d:\christ\python\lib\site-packages (from html5lib>=1.1->yfinance) (1.16.0) Requirement already satisfied: webencodings in d:\christ\python\lib\site-packages (from html5lib>=1.1->yfinance) (0.5.1) Requirement already satisfied: python-dateutil>=2.8.2 in d:\christ\python\lib\site-packages (from pandas>=1.3.0->yfinance) (2.8.2) Requirement already satisfied: tzdata>=2022.1 in d:\christ\python\lib\site-packages (from pandas>=1.3.0->yfinance) (2023.3) Requirement already satisfied: charset-normalizer<4,>=2 in d:\christ\python\lib\site-packages (from requests>=2.31->yfinance) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in d:\christ\python\lib\site-packages (from requests>=2.31->yfinance) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in d:\christ\python\lib\site-packages (from requests>=2.31->yfinance) (1.26.16) Requirement already satisfied: certifi>=2017.4.17 in d:\christ\python\lib\site-packages (from requests>=2.31->yfinance) (2023.7.22)
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.model_selection import train_test_split
#pip install yfinance
# Get the data for HDFCBANK
ticker = "HDFCBANK.NS"
# Download the data
data = yf.download(ticker, start="2021-04-01", end="2024-03-31")
[*********************100%%**********************] 1 of 1 completed
data.head()
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2021-04-01 | 1499.400024 | 1499.400024 | 1465.000000 | 1486.750000 | 1427.248291 | 17881881 |
| 2021-04-05 | 1480.000000 | 1485.000000 | 1431.000000 | 1449.599976 | 1391.584961 | 8003293 |
| 2021-04-06 | 1460.000000 | 1462.650024 | 1432.650024 | 1440.250000 | 1382.609131 | 7537867 |
| 2021-04-07 | 1439.300049 | 1456.699951 | 1421.550049 | 1447.199951 | 1389.280884 | 12544090 |
| 2021-04-08 | 1453.000000 | 1460.900024 | 1430.500000 | 1432.800049 | 1375.457397 | 8806796 |
## 2. Select the Target Variable and Clean the data
# Select the Target Varibale Adj Close
df = data[['Adj Close']]
# Check for missing values
print("Missing values:")
print(df.isnull().sum())
Missing values: Adj Close 0 dtype: int64
# Plot the data
plt.figure(figsize=(10, 5))
plt.plot(df, label='Adj Close Price')
plt.title('HDFCBANK.NS Adj Close Price')
plt.xlabel('Date')
plt.ylabel('Adj Close Price')
plt.legend()
plt.show()
import plotly.graph_objs as go
import plotly.express as px
# Assuming df is your Dataframe with a datetime index and 'Adj Close Price' as the column
fig = px.line(df, y='Adj Close', title='HDFCBANK.NS Adj Close Price')
fig.update_layout(
xaxis_title='Date',
yaxis_title='Adj Close',
legend_title_text='',
width=1000,
height=500
)
fig.show()
from statsmodels.tsa.seasonal import seasonal_decompose
df.columns
Index(['Adj Close'], dtype='object')
from statsmodels.tsa.seasonal import seasonal_decompose
# Decompose the time series
result = seasonal_decompose(df['Adj Close'], model='multiplicative', period=12)
# Plot the decomposed components
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
result.observed.plot(ax=ax1)
ax1.set_ylabel('Observed')
result.trend.plot(ax=ax2)
ax2.set_ylabel('Trend')
result.seasonal.plot(ax=ax3)
ax3.set_ylabel('Seasonal')
result.resid.plot(ax=ax4)
ax4.set_ylabel('Residual')
plt.xlabel('Date')
plt.tight_layout()
plt.show()
# Split the data into training and test sets
#train_data, test_data = train_test_split(df, test_size=0.2, shuffle=False)
monthly_data = df.resample("M").mean()
monthly_data.head()
| Adj Close | |
|---|---|
| Date | |
| 2021-04-30 | 1371.761410 |
| 2021-05-31 | 1387.358661 |
| 2021-06-30 | 1434.235673 |
| 2021-07-31 | 1424.722836 |
| 2021-08-31 | 1460.019339 |
# Split the data into training and test sets
train_data, test_data = train_test_split(monthly_data, test_size=0.2, shuffle=False)
len(monthly_data), len(train_data)
(36, 28)
from statsmodels.tsa.holtwinters import ExponentialSmoothing
# Fit the Holt-Winters model
holt_winters_model = ExponentialSmoothing(train_data, seasonal='mul', seasonal_periods=12).fit()
# Forecast for the next year (12 months)
holt_winters_forecast = holt_winters_model.forecast(12)
# Plot the forecast
plt.figure(figsize=(10, 5))
plt.plot(train_data, label='Observed')
plt.plot(holt_winters_forecast, label='Holt-Winters Forecast', linestyle='--')
plt.title('Holt-Winters Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
# Forecast for the next year (12 months)
y_pred = holt_winters_model.forecast(8)
len(test_data), len(y_pred)
(8, 8)
y_pred, test_data
(2023-08-31 1706.390263
2023-09-30 1731.391461
2023-10-31 1731.736022
2023-11-30 1739.483559
2023-12-31 1720.176267
2024-01-31 1721.578590
2024-02-29 1704.024235
2024-03-31 1622.031853
Freq: M, dtype: float64,
Adj Close
Date
2023-08-31 1587.990284
2023-09-30 1563.766089
2023-10-31 1494.526593
2023-11-30 1486.096698
2023-12-31 1630.068201
2024-01-31 1561.023990
2024-02-29 1402.113066
2024-03-31 1423.774400)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Compute RMSE
rmse = np.sqrt(mean_squared_error(test_data, y_pred))
print(f'RMSE: {rmse}')
# Compute MAE
mae = mean_absolute_error(test_data, y_pred)
print(f'MAE: {mae}')
# Compute MAPE
mape = np.mean(np.abs((test_data - y_pred) / test_data)) * 100
print(f'MAPE: {mape}')
# Compute R-squared
r2 = r2_score(test_data, y_pred)
print(f'R-squared: {r2}')
RMSE: 202.1582454555789 MAE: 190.9316161273921 MAPE: nan R-squared: -6.220965966554176
# Forecast for the next year (12 months)
holt_winters_forecast = holt_winters_model.forecast(len(test_data)+12)
holt_winters_forecast
2023-08-31 1706.390263 2023-09-30 1731.391461 2023-10-31 1731.736022 2023-11-30 1739.483559 2023-12-31 1720.176267 2024-01-31 1721.578590 2024-02-29 1704.024235 2024-03-31 1622.031853 2024-04-30 1669.100690 2024-05-31 1626.215131 2024-06-30 1630.460827 2024-07-31 1649.835026 2024-08-31 1706.390263 2024-09-30 1731.391461 2024-10-31 1731.736022 2024-11-30 1739.483559 2024-12-31 1720.176267 2025-01-31 1721.578590 2025-02-28 1704.024235 2025-03-31 1622.031853 Freq: M, dtype: float64
monthly_data.columns
Index(['Adj Close'], dtype='object')
!pip install blosc2~=2.0.0
!pip install pmdarima
Requirement already satisfied: blosc2~=2.0.0 in d:\christ\python\lib\site-packages (2.0.0) Requirement already satisfied: msgpack in d:\christ\python\lib\site-packages (from blosc2~=2.0.0) (1.0.3) Requirement already satisfied: pmdarima in d:\christ\python\lib\site-packages (2.0.4) Requirement already satisfied: joblib>=0.11 in d:\christ\python\lib\site-packages (from pmdarima) (1.2.0) Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in d:\christ\python\lib\site-packages (from pmdarima) (3.0.10) Requirement already satisfied: numpy>=1.21.2 in d:\christ\python\lib\site-packages (from pmdarima) (1.24.3) Requirement already satisfied: pandas>=0.19 in d:\christ\python\lib\site-packages (from pmdarima) (2.0.3) Requirement already satisfied: scikit-learn>=0.22 in d:\christ\python\lib\site-packages (from pmdarima) (1.3.0) Requirement already satisfied: scipy>=1.3.2 in d:\christ\python\lib\site-packages (from pmdarima) (1.11.1) Requirement already satisfied: statsmodels>=0.13.2 in d:\christ\python\lib\site-packages (from pmdarima) (0.14.0) Requirement already satisfied: urllib3 in d:\christ\python\lib\site-packages (from pmdarima) (1.26.16) Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in d:\christ\python\lib\site-packages (from pmdarima) (68.0.0) Requirement already satisfied: packaging>=17.1 in d:\christ\python\lib\site-packages (from pmdarima) (23.1) Requirement already satisfied: python-dateutil>=2.8.2 in d:\christ\python\lib\site-packages (from pandas>=0.19->pmdarima) (2.8.2) Requirement already satisfied: pytz>=2020.1 in d:\christ\python\lib\site-packages (from pandas>=0.19->pmdarima) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in d:\christ\python\lib\site-packages (from pandas>=0.19->pmdarima) (2023.3) Requirement already satisfied: threadpoolctl>=2.0.0 in d:\christ\python\lib\site-packages (from scikit-learn>=0.22->pmdarima) (2.2.0) Requirement already satisfied: patsy>=0.5.2 in d:\christ\python\lib\site-packages (from statsmodels>=0.13.2->pmdarima) (0.5.3) Requirement already satisfied: six in d:\christ\python\lib\site-packages (from patsy>=0.5.2->statsmodels>=0.13.2->pmdarima) (1.16.0)
from pmdarima import auto_arima
# Fit auto_arima model
arima_model = auto_arima(train_data['Adj Close'],
seasonal=True,
m=12, # Monthly seasonality
stepwise=True,
suppress_warnings=True)
# Print the model summary
print(arima_model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 28
Model: SARIMAX(0, 1, 0) Log Likelihood -146.471
Date: Mon, 22 Jul 2024 AIC 294.942
Time: 23:07:58 BIC 296.238
Sample: 04-30-2021 HQIC 295.327
- 07-31-2023
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
sigma2 3017.5643 897.887 3.361 0.001 1257.739 4777.390
===================================================================================
Ljung-Box (L1) (Q): 0.12 Jarque-Bera (JB): 0.12
Prob(Q): 0.72 Prob(JB): 0.94
Heteroskedasticity (H): 1.48 Skew: -0.04
Prob(H) (two-sided): 0.57 Kurtosis: 2.69
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# Number of periods to forecast
n_periods = 8
# Generate forecast
forecast, conf_int = arima_model.predict(n_periods=n_periods, return_conf_int=True)
# Plot the original data, fitted values, and forecast
plt.figure(figsize=(12, 6))
plt.plot(train_data['Adj Close'], label='Original Data')
plt.plot(forecast.index, forecast, label='Forecast', color='green')
plt.fill_between(forecast.index,
conf_int[:, 0],
conf_int[:, 1],
color='k', alpha=.15)
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Auto ARIMA Forecasting')
plt.show()
len(forecast)
8
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Compute RMSE
rmse = np.sqrt(mean_squared_error(test_data, forecast))
print(f'RMSE: {rmse}')
# Compute MAE
mae = mean_absolute_error(test_data, forecast)
print(f'MAE: {mae}')
# Compute MAPE
mape = np.mean(np.abs((test_data - forecast) / forecast)) * 100
print(f'MAPE: {mape}')
# Compute R-squared
r2 = r2_score(test_data, forecast)
print(f'R-squared: {r2}')
RMSE: 151.20819480061095 MAE: 131.16513878408097 MAPE: nan R-squared: -3.0398294980017004
daily_data= df.copy()
# Plot the original data, fitted values, and forecast
plt.figure(figsize=(12, 6))
plt.plot(daily_data['Adj Close'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()
# Fit auto_arima model
arima_model = auto_arima(daily_data['Adj Close'],
seasonal=True,
m=7, # Weekly seasonality
stepwise=True,
suppress_warnings=True)
# Print the model summary
print(arima_model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 740
Model: SARIMAX(3, 1, 2) Log Likelihood -3259.259
Date: Mon, 22 Jul 2024 AIC 6530.519
Time: 23:08:42 BIC 6558.150
Sample: 0 HQIC 6541.173
- 740
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.3772 0.029 -12.905 0.000 -0.434 -0.320
ar.L2 -0.9635 0.015 -65.139 0.000 -0.992 -0.934
ar.L3 0.0709 0.028 2.516 0.012 0.016 0.126
ma.L1 0.4359 0.012 35.087 0.000 0.412 0.460
ma.L2 0.9799 0.013 73.897 0.000 0.954 1.006
sigma2 394.6236 10.481 37.652 0.000 374.082 415.165
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 1680.79
Prob(Q): 0.94 Prob(JB): 0.00
Heteroskedasticity (H): 0.85 Skew: -0.29
Prob(H) (two-sided): 0.21 Kurtosis: 10.37
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
# Generate in-sample predictions
fitted_values = arima_model.predict_in_sample()
fitted_values
Date
2021-04-01 0.000000
2021-04-05 1427.276808
2021-04-06 1389.822922
2021-04-07 1383.489031
2021-04-08 1388.542828
...
2024-03-21 1415.241500
2024-03-22 1429.154193
2024-03-26 1417.546726
2024-03-27 1405.902445
2024-03-28 1427.569342
Name: predicted_mean, Length: 740, dtype: float64
# Number of periods to forecast
n_periods = 60 # For example, forecast the next 30 days
# Generate forecast
forecast, conf_int = arima_model.predict(n_periods=n_periods, return_conf_int=True)
D:\CHRIST\PYTHON\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:836: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`. D:\CHRIST\PYTHON\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:836: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
len(forecast)
60
len(future_dates)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[45], line 1 ----> 1 len(future_dates) NameError: name 'future_dates' is not defined
# Create future dates index
last_date = daily_data.index[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_periods)
# Convert forecast to a DataFrame with future_dates as the index
forecast_df = pd.DataFrame(forecast.values, index=future_dates, columns=['forecast'])
conf_int_df = pd.DataFrame(conf_int, index=future_dates, columns=['lower_bound', 'upper_bound'])
# Plot the original data, fitted values, and forecast
plt.figure(figsize=(12, 6))
plt.plot(daily_data['Adj Close'], label='Original Data')
plt.plot(forecast_df, label='Forecast', color='green')
plt.fill_between(future_dates,
conf_int_df['lower_bound'],
conf_int_df['upper_bound'],
color='k', alpha=.15)
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Auto ARIMA Forecasting')
plt.show()
!pip install tensorflow
Requirement already satisfied: tensorflow in d:\christ\python\lib\site-packages (2.17.0) Requirement already satisfied: tensorflow-intel==2.17.0 in d:\christ\python\lib\site-packages (from tensorflow) (2.17.0) Requirement already satisfied: absl-py>=1.0.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (2.1.0) Requirement already satisfied: astunparse>=1.6.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (1.6.3) Requirement already satisfied: flatbuffers>=24.3.25 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (24.3.25) Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (0.6.0) Requirement already satisfied: google-pasta>=0.1.1 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (0.2.0) Requirement already satisfied: h5py>=3.10.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (3.11.0) Requirement already satisfied: libclang>=13.0.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (18.1.1) Requirement already satisfied: ml-dtypes<0.5.0,>=0.3.1 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (0.4.0) Requirement already satisfied: opt-einsum>=2.3.2 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (3.3.0) Requirement already satisfied: packaging in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (23.1) Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (4.25.3) Requirement already satisfied: requests<3,>=2.21.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (2.31.0) Requirement already satisfied: setuptools in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (68.0.0) Requirement already satisfied: six>=1.12.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (1.16.0) Requirement already satisfied: termcolor>=1.1.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (2.4.0) Requirement already satisfied: typing-extensions>=3.6.6 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (4.7.1) Requirement already satisfied: wrapt>=1.11.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (1.14.1) Requirement already satisfied: grpcio<2.0,>=1.24.3 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (1.65.1) Requirement already satisfied: tensorboard<2.18,>=2.17 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (2.17.0) Requirement already satisfied: keras>=3.2.0 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (3.4.1) Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (0.31.0) Requirement already satisfied: numpy<2.0.0,>=1.23.5 in d:\christ\python\lib\site-packages (from tensorflow-intel==2.17.0->tensorflow) (1.24.3) Requirement already satisfied: wheel<1.0,>=0.23.0 in d:\christ\python\lib\site-packages (from astunparse>=1.6.0->tensorflow-intel==2.17.0->tensorflow) (0.38.4) Requirement already satisfied: rich in d:\christ\python\lib\site-packages (from keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (13.7.1) Requirement already satisfied: namex in d:\christ\python\lib\site-packages (from keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (0.0.8) Requirement already satisfied: optree in d:\christ\python\lib\site-packages (from keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (0.12.1) Requirement already satisfied: charset-normalizer<4,>=2 in d:\christ\python\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.17.0->tensorflow) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in d:\christ\python\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.17.0->tensorflow) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in d:\christ\python\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.17.0->tensorflow) (1.26.16) Requirement already satisfied: certifi>=2017.4.17 in d:\christ\python\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.17.0->tensorflow) (2023.7.22) Requirement already satisfied: markdown>=2.6.8 in d:\christ\python\lib\site-packages (from tensorboard<2.18,>=2.17->tensorflow-intel==2.17.0->tensorflow) (3.4.1) Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in d:\christ\python\lib\site-packages (from tensorboard<2.18,>=2.17->tensorflow-intel==2.17.0->tensorflow) (0.7.2) Requirement already satisfied: werkzeug>=1.0.1 in d:\christ\python\lib\site-packages (from tensorboard<2.18,>=2.17->tensorflow-intel==2.17.0->tensorflow) (2.2.3) Requirement already satisfied: MarkupSafe>=2.1.1 in d:\christ\python\lib\site-packages (from werkzeug>=1.0.1->tensorboard<2.18,>=2.17->tensorflow-intel==2.17.0->tensorflow) (2.1.1) Requirement already satisfied: markdown-it-py>=2.2.0 in d:\christ\python\lib\site-packages (from rich->keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (2.2.0) Requirement already satisfied: pygments<3.0.0,>=2.13.0 in d:\christ\python\lib\site-packages (from rich->keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (2.15.1) Requirement already satisfied: mdurl~=0.1 in d:\christ\python\lib\site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.2.0->tensorflow-intel==2.17.0->tensorflow) (0.1.0)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
data.head()
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2021-04-01 | 1499.400024 | 1499.400024 | 1465.000000 | 1486.750000 | 1427.248291 | 17881881 |
| 2021-04-05 | 1480.000000 | 1485.000000 | 1431.000000 | 1449.599976 | 1391.584961 | 8003293 |
| 2021-04-06 | 1460.000000 | 1462.650024 | 1432.650024 | 1440.250000 | 1382.609131 | 7537867 |
| 2021-04-07 | 1439.300049 | 1456.699951 | 1421.550049 | 1447.199951 | 1389.280884 | 12544090 |
| 2021-04-08 | 1453.000000 | 1460.900024 | 1430.500000 | 1432.800049 | 1375.457397 | 8806796 |
# Initialize MinMaxScaler
scaler = MinMaxScaler()
# Select features (excluding 'Adj Close') and target ('Adj Close')
features = data.drop(columns=['Adj Close'])
target = data[['Adj Close']]
# Fit the scaler on features and target
scaled_features = scaler.fit_transform(features)
scaled_target = scaler.fit_transform(target)
# Create DataFrame with scaled features and target
scaled_df = pd.DataFrame(scaled_features, columns=features.columns, index=df.index)
scaled_df['Adj Close'] = scaled_target
import numpy as np
# Function to create sequences
def create_sequences(scaled_df, target_col, sequence_length):
sequences = []
labels = []
for i in range(len(scaled_df) - sequence_length):
sequences.append(scaled_df[i:i + sequence_length])
labels.append(scaled_df[i + sequence_length, target_col]) # Target column index
return np.array(sequences), np.array(labels)
# Convert DataFrame to NumPy array
data_array = scaled_df.values
# Define the target column index and sequence length
target_col = scaled_df.columns.get_loc('Adj Close')
sequence_length = 30
# Create sequences
X, y = create_sequences(data_array, target_col, sequence_length)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
Shape of X: (710, 30, 6) Shape of y: (710,)
# Split the data into training and testing sets (80% training, 20% testing)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(sequence_length, 6)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))
D:\CHRIST\PYTHON\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ lstm (LSTM) │ (None, 30, 50) │ 11,400 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 30, 50) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ lstm_1 (LSTM) │ (None, 50) │ 20,200 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 50) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 1) │ 51 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 31,651 (123.64 KB)
Trainable params: 31,651 (123.64 KB)
Non-trainable params: 0 (0.00 B)
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), shuffle=False)
# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
Epoch 1/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 6s 80ms/step - loss: 0.0648 - val_loss: 0.0154 Epoch 2/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 0.0258 - val_loss: 0.0110 Epoch 3/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0243 - val_loss: 0.0112 Epoch 4/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0146 - val_loss: 0.0097 Epoch 5/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0131 - val_loss: 0.0094 Epoch 6/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 36ms/step - loss: 0.0146 - val_loss: 0.0091 Epoch 7/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0148 - val_loss: 0.0089 Epoch 8/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0126 - val_loss: 0.0087 Epoch 9/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 46ms/step - loss: 0.0113 - val_loss: 0.0085 Epoch 10/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 28ms/step - loss: 0.0127 - val_loss: 0.0079 Epoch 11/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 56ms/step - loss: 0.0125 - val_loss: 0.0076 Epoch 12/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - loss: 0.0117 - val_loss: 0.0075 Epoch 13/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 43ms/step - loss: 0.0120 - val_loss: 0.0071 Epoch 14/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0120 - val_loss: 0.0070 Epoch 15/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0121 - val_loss: 0.0069 Epoch 16/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 37ms/step - loss: 0.0103 - val_loss: 0.0067 Epoch 17/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 35ms/step - loss: 0.0101 - val_loss: 0.0066 Epoch 18/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 34ms/step - loss: 0.0091 - val_loss: 0.0065 Epoch 19/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 40ms/step - loss: 0.0103 - val_loss: 0.0064 Epoch 20/20 18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 42ms/step - loss: 0.0101 - val_loss: 0.0061 5/5 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step - loss: 0.0057 Test Loss: 0.006123048719018698
# Predict on the test set
y_pred = model.predict(X_test)
# Inverse transform the predictions and true values to get them back to the original scale
y_test_scaled = scaler.inverse_transform(np.concatenate((np.zeros((len(y_test), 5)), y_test.reshape(-1, 1)), axis=1))[:, 5]
y_pred_scaled = scaler.inverse_transform(np.concatenate((np.zeros((len(y_pred), 5)), y_pred), axis=1))[:, 5]
5/5 ━━━━━━━━━━━━━━━━━━━━ 1s 97ms/step
# Print some predictions and true values
print("Predictions vs True Values:")
for i in range(10):
print(f"Prediction: {y_pred_scaled[i]}, True Value: {y_test_scaled[i]}")
Predictions vs True Values: Prediction: 1557.1068279254832, True Value: 1557.4324951171875 Prediction: 1557.772691356753, True Value: 1550.2801513671875 Prediction: 1556.071010218293, True Value: 1553.4864501953125 Prediction: 1554.1717759384337, True Value: 1563.203857421875 Prediction: 1555.15807647998, True Value: 1553.683837890625 Prediction: 1555.6385422165404, True Value: 1574.0062255859375 Prediction: 1558.06130181755, True Value: 1589.1494140625 Prediction: 1563.5076745172046, True Value: 1601.5303955078125 Prediction: 1571.9104797663895, True Value: 1609.8172607421875 Prediction: 1581.810819768114, True Value: 1614.8485107421875
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test_scaled, y_pred_scaled))
print(f'RMSE: {rmse}')
# Compute MAE
mae = mean_absolute_error(y_test_scaled, y_pred_scaled)
print(f'MAE: {mae}')
# Compute MAPE
mape = np.mean(np.abs((y_test_scaled - y_pred_scaled) / y_pred_scaled)) * 100
print(f'MAPE: {mape}')
# Compute R-squared
r2 = r2_score(y_test_scaled, y_pred_scaled)
print(f'R-squared: {r2}')
RMSE: 35.62022531125813 MAE: 24.270046190543773 MAPE: 1.5872152306458873 R-squared: 0.8400233698447482
# Plot the predictions vs true values
plt.figure(figsize=(14, 7))
plt.plot(y_test_scaled, label='True Values')
plt.plot(y_pred_scaled, label='LSTM Predictions')
plt.title('LSTM: Predictions vs True Values')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.show()
from sklearn.ensemble import RandomForestRegressor #ensemble model
from sklearn.tree import DecisionTreeRegressor #simple algo
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import numpy as np
def create_sequences(data, target_col, sequence_length):
"""
Create sequences of features and labels for time series data.
Parameters:
- data (np.ndarray): The input data where the last column is the target.
- target_col (int): The index of the target column in the data.
- sequence_length (int): The length of each sequence.
Returns:
- np.ndarray: 3D array of sequences (samples, sequence_length, num_features)
- np.ndarray: 1D array of target values
"""
num_samples = len(data) - sequence_length
num_features = data.shape[1]
sequences = np.zeros((num_samples, sequence_length, num_features))
labels = np.zeros(num_samples)
for i in range(num_samples):
sequences[i] = data[i:i + sequence_length]
labels[i] = data[i + sequence_length, target_col] # Target is specified column
return sequences, labels
# Example usage
sequence_length = 30
# Convert DataFrame to NumPy array
data_array = scaled_df.values
# Define the target column index
target_col = scaled_df.columns.get_loc('Adj Close')
# Create sequences
X, y = create_sequences(data_array, target_col, sequence_length)
# Flatten X for Decision Tree
num_samples, seq_length, num_features = X.shape
X_flattened = X.reshape(num_samples, seq_length * num_features)
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_flattened, y, test_size=0.2, random_state=42)
# Train Decision Tree model
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
# Make predictions
y_pred_dt = dt_model.predict(X_test)
# Evaluate the model
mse_dt = mean_squared_error(y_test, y_pred_dt)
print(f'MSE (Decision Tree): {mse_dt}')
MSE (Decision Tree): 0.0035806542382392664
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred_dt))
print(f'RMSE: {rmse}')
# Compute MAE
mae = mean_absolute_error(y_test, y_pred_dt)
print(f'MAE: {mae}')
# Compute MAPE
mape = np.mean(np.abs((y_test - y_pred_scaled) / y_pred_dt)) * 100
print(f'MAPE: {mape}')
# Compute R-squared
r2 = r2_score(y_test, y_pred_dt)
print(f'R-squared: {r2}')
RMSE: 0.059838568149975534 MAE: 0.04567951741443901 MAPE: 534889.7363207806 R-squared: 0.91843115194108
# Train and evaluate the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest Mean Squared Error: {mse_rf}")
Random Forest Mean Squared Error: 0.0019725053651325443
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred_rf))
print(f'RMSE: {rmse}')
# Compute MAE
mae = mean_absolute_error(y_test, y_pred_rf)
print(f'MAE: {mae}')
# Compute MAPE
mape = np.mean(np.abs((y_test - y_pred_scaled) / y_pred_rf)) * 100
print(f'MAPE: {mape}')
# Compute R-squared
r2 = r2_score(y_test, y_pred_rf)
print(f'R-squared: {r2}')
RMSE: 0.044412896383061354 MAE: 0.031993516246740766 MAPE: 391287.1117373367 R-squared: 0.9550654769439513
# Print some predictions and true values for both models
print("\nDecision Tree Predictions vs True Values:")
for i in range(10):
print(f"Prediction: {y_pred_dt[i]}, True Value: {y_test[i]}")
Decision Tree Predictions vs True Values: Prediction: 0.14301964903492514, True Value: 0.17023085262915405 Prediction: 0.24329427374889656, True Value: 0.2576497906056945 Prediction: 0.4252362976587576, True Value: 0.3885276001725888 Prediction: 0.3695623783717683, True Value: 0.38513561891389525 Prediction: 0.012319899042366522, True Value: 0.06984923127337517 Prediction: 0.6467338269480827, True Value: 0.5516360698283145 Prediction: 0.7998599657986172, True Value: 0.7564080664205117 Prediction: 0.6467338269480827, True Value: 0.6056444348740402 Prediction: 0.2637563758869792, True Value: 0.35815744810595795 Prediction: 0.3370568522231565, True Value: 0.33337257437406276
# Plot the predictions vs true values for Decision Tree
plt.figure(figsize=(14, 7))
plt.plot(y_test, label='True Values')
plt.plot(y_pred_dt, label='Decision Tree Predictions')
plt.title('Decision Tree: Predictions vs True Values')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.show()
print("\nRandom Forest Predictions vs True Values:")
for i in range(10):
print(f"Prediction: {y_pred_rf[i]}, True Value: {y_test[i]}")
Random Forest Predictions vs True Values: Prediction: 0.17902225986024983, True Value: 0.17023085262915405 Prediction: 0.231048315017421, True Value: 0.2576497906056945 Prediction: 0.39621869040937313, True Value: 0.3885276001725888 Prediction: 0.36478687153886624, True Value: 0.38513561891389525 Prediction: 0.05818124480268493, True Value: 0.06984923127337517 Prediction: 0.5930390264217347, True Value: 0.5516360698283145 Prediction: 0.7918658471278109, True Value: 0.7564080664205117 Prediction: 0.5662165799743584, True Value: 0.6056444348740402 Prediction: 0.2944056980136434, True Value: 0.35815744810595795 Prediction: 0.35119080021957105, True Value: 0.33337257437406276
# Plot the predictions vs true values for Random Forest
plt.figure(figsize=(14, 7))
plt.plot(y_test, label='True Values')
plt.plot(y_pred_rf, label='Random Forest Predictions')
plt.title('Random Forest: Predictions vs True Values')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.show()
# Plot both Decision Tree and Random Forest predictions together
plt.figure(figsize=(14, 7))
plt.plot(y_test, label='True Values')
plt.plot(y_pred_dt, label='Decision Tree Predictions')
plt.plot(y_pred_rf, label='Random Forest Predictions')
plt.title('Decision Tree & Random Forest: Predictions vs True Values')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.show()